import pymysql,pandas
from urllib import parse
conn = pymysql.connect(
    host="172.16.10.187",
    user="tidb",
    password='Tidb@guest@123',
    database='crawl',
    port=3306,
    charset='utf8'
)
cursor=conn.cursor()

def clean():
    sql="select * from rose_flower_copy2"
    cursor.execute(sql)
    flower_all=cursor.fetchall()
    for flower_item in flower_all:
        if  flower_item[12] not in flower_item[4] or flower_item[10]==None  or '礼盒'in flower_item[4]:
            delete_sql="delete from rose_flower_copy2 where id={flower_id}".format(flower_id=flower_item[0])
            cursor.execute(delete_sql)
            conn.commit()
        else:
            continue



def remove_duplicate():
    
    # 除去重复数据
    # 读取数据
    data1 = pandas.read_sql("select * from rose_flower_copy2",conn)
    # 除去重复数据
    data2 = data1.drop_duplicates(subset=["url","href","title","info","number","classify",],keep="first")
    # 存入数据库
    # cursor.execute("TRUNCATE table new_58_office")
    # conn.commit()
    pandas.io.sql.to_sql(data2, 'rose_flower_copy4', con='mysql+pymysql://tidb:'+parse.quote_plus("Tidb@guest@123")+'@172.16.10.187:3306/crawl?charset=utf8mb4', index=False, if_exists='replace')